# Import Libraries
library(knitr)
library(psych)
library(kableExtra)
library(tidyquant)
library(quantmod)
library(tseries)
library(tidyverse)
library(GGally)
library(ggplot2)
library(fpp2)
library(dplyr)
library(quantmod)
library(astsa)
library(NTS)
library(fGarch)
library(rugarch)
library(forecast)
# Import Dataset
nflx_df = getSymbols.yahoo(Symbols = 'NFLX' , env = .GlobalEnv, src = 'yahoo', from = '2016-11-15', to = '2021-11-16', auto.assign = FALSE)
head(nflx_df)
## NFLX.Open NFLX.High NFLX.Low NFLX.Close NFLX.Volume NFLX.Adjusted
## 2016-11-15 114.55 116.41 113.09 113.59 7445100 113.59
## 2016-11-16 112.96 116.12 111.81 115.19 5933700 115.19
## 2016-11-17 115.13 116.81 113.56 115.03 6232700 115.03
## 2016-11-18 115.73 116.42 113.51 115.21 6746800 115.21
## 2016-11-21 116.20 118.72 116.19 117.96 7064500 117.96
## 2016-11-22 118.32 119.46 116.98 118.04 7007200 118.04
tail(nflx_df)
## NFLX.Open NFLX.High NFLX.Low NFLX.Close NFLX.Volume NFLX.Adjusted
## 2021-11-08 650.29 656.00 643.79 651.45 2887500 651.45
## 2021-11-09 653.70 660.50 650.52 655.99 2415600 655.99
## 2021-11-10 653.01 660.33 642.11 646.91 2405800 646.91
## 2021-11-11 650.24 665.82 649.71 657.58 2868300 657.58
## 2021-11-12 660.01 683.34 653.82 682.61 4192700 682.61
## 2021-11-15 681.24 685.26 671.49 679.33 2872200 679.33
# Create new dataframe to preserve original dataset and assign new variable.
nflx <- nflx_df
# Check for Missing Variables
colSums(is.na(nflx))
## NFLX.Open NFLX.High NFLX.Low NFLX.Close NFLX.Volume
## 0 0 0 0 0
## NFLX.Adjusted
## 0
# Check structure to determine the shape and data types of the data set
str(nflx)
## An 'xts' object on 2016-11-15/2021-11-15 containing:
## Data: num [1:1259, 1:6] 115 113 115 116 116 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:6] "NFLX.Open" "NFLX.High" "NFLX.Low" "NFLX.Close" ...
## Indexed by objects of class: [Date] TZ: UTC
## xts Attributes:
## List of 2
## $ src : chr "yahoo"
## $ updated: POSIXct[1:1], format: "2021-12-06 21:06:14"
Moving forward, specified in the exploratory data analysis section of the paper, research shows that adjusted closing price is the most accurate reflection of the true value of stock. Most stock price websites that don’t disclose both closing and adjusted closing prices, the closing price shown is the adjusted price. Based on this, NFLX.Adjusted and dates will be used to predict stock prices.
# New dataframe to select only the adjusted close
adj_nflx = Ad(nflx)
head(adj_nflx)
## NFLX.Adjusted
## 2016-11-15 113.59
## 2016-11-16 115.19
## 2016-11-17 115.03
## 2016-11-18 115.21
## 2016-11-21 117.96
## 2016-11-22 118.04
summary(adj_nflx)
## Index NFLX.Adjusted
## Min. :2016-11-15 Min. :113.6
## 1st Qu.:2018-02-15 1st Qu.:262.9
## Median :2019-05-20 Median :348.5
## Mean :2019-05-18 Mean :351.1
## 3rd Qu.:2020-08-17 3rd Qu.:485.1
## Max. :2021-11-15 Max. :690.3
Summary shows that within the span of five years, Netflix’s stock prices have fluctuated with a range of 576.5, with the average being around 351.1.
par(mfrow=c(2,1))
plot(nflx$NFLX.Volume, type = 'l', ylab = 'Volume', main = NULL, cex.axis = .5)
mtext(side = 3, text = 'Volume of Netflix Stock Over Time', line = 1.5, cex = 1, font = 2)
plot(adj_nflx, type = 'l', ylab = 'Close Prices', main = NULL, cex.axis = .5)
mtext(side = 3, text = 'Close Prices over Time', line = 1.5, cex = 1, font = 2)
ADF (Augmented Dickey-Fuller) Test Null hypothesis H0 : The times series is non-stationary. Alternative hypothesis HA : The times series is stationary. If p-value is less than the significant level (0.05), reject the null hypothesis and conclude that the times series is stationary. Since p-value is 0.5738, it fails to reject the null value. This will show that this times series is non-stationary.
print(adf.test(adj_nflx))
##
## Augmented Dickey-Fuller Test
##
## data: adj_nflx
## Dickey-Fuller = -2.0107, Lag order = 10, p-value = 0.5738
## alternative hypothesis: stationary
Differencing
adf_diff = diff(log(adj_nflx), lag = 1)
adf_diff = na.locf(adf_diff, na.rm = TRUE, fromLast = TRUE)
plot(adf_diff, main = NULL)
mtext(side = 3, text = 'Logged Difference', line = 1.5, cex = 1, font = 2)
ACF (Autocorrelation Function)
# ACF
acf_nflx = acf(adf_diff, main = 'ACF')
PACF (Partial Autocorrelation Function)
# PACF
pacf_nflx = pacf(adf_diff, main = 'PACF')
Adjusted Closing Price vs Difference
The adjusted closing price shows a relatively steady rise as time increased. When taken the difference, there are some spikes within the data. Because of this, we will take the logged difference to see if there is less variance.
par(mfrow = c(2,1))
plot(adj_nflx, main = 'Adjusted Closing Price', col = 'cornflowerblue')
plot(diff(adj_nflx), main = 'Difference Closing Price', col = 'cornflowerblue')
Decomposition of Additive Time Series This plot shows a steady rise for the trend. The seasonal appears to have a frequency as well.
# Decomposition of Additive Time Series
nflx_ts = ts(adj_nflx, frequency = 365, start = 2015-11-15)
nflx_de = decompose(nflx_ts)
plot(nflx_de)
Logged Difference
The graphs show that the logged difference shows a more balanced variance. Thus, we will explore log_diff for modeling.
Observation/Note: Volatility is observed even after with differencing.
# Calculate the First Degree Difference
diff = diff(adj_nflx)
log_diff = diff(log(adj_nflx))
sqrt_diff = diff(sqrt(adj_nflx))
par(mfrow = c(3,1))
plot(diff, type = 'l', xlab = 'Time', ylab = 'Difference', main = 'First Degree Difference - Raw Data')
plot(log_diff, type = 'l', xlab = 'Time', ylab = 'Logged Difference', main = 'First Degree Difference - Logged Data')
plot(sqrt_diff, type = 'l', xlab = 'Time', ylab = 'Square Root Difference', main = 'First Degree Difference - Square-Root Data')
ACF First Degree Difference
par(mfrow = c(2,1))
acf(sqrt_diff, main = 'Autocorrelation Function of First Differences', na.action = na.pass)
pacf(sqrt_diff, lag.max = 31, main = 'Partial Autocorrelation Function of First Differences', na.action = na.pass)
Second Degree Differencing
Logged difference with second degree shows a more balanced variance. Thus, we will use log_diff2 for modeling.
# Calculate the Second Degree Difference
diff2 = diff(diff)
log_diff2 = diff(log_diff)
sqrt_diff2 = diff(sqrt_diff)
par(mfrow = c(3,1))
plot(diff2, type = 'l', xlab = 'Time', ylab = '2nd Diff', main = 'Second Degree Difference - Raw Data')
plot(log_diff2, type = 'l', xlab = 'Time', ylab = '2nd Log Diff', main = 'Second Degree Difference - Logged Data')
plot(sqrt_diff2, type = 'l', xlab = 'Time', ylab = '2nd Diff - Square-Root', main = 'Second Degree Difference - Square-Root Data')
par(mfrow = c(2, 1))
plot(nflx, ylab = 'QEPS', type = 'o', col = 4, cex = .5, main = NULL)
mtext(side = 3, text = 'Stock Price Growth', line = 1.5, cex = 1, font = 2)
plot(log(nflx), ylab = 'log(QEPS)', type = 'o', col = 4, cex = .5, main = NULL)
Per EDA process, it’s noted that differencing with logging showed better stationarity. Thus, we will be using log_diff for first differencing and log_diff2 for second differencing for our models.
Parameters for the models will be chosen by ACF and PACF plots.
The models with different parameters will be tested. In addition, auto.arima wil be tested for choosing the best models.
Diagnostic measures will be performed to see the residual’s correlation. Since we have noticed high volatility in the EDA process, we will move on to GARCH models to confirm the volatility of Netflix stock prices.
MODELS FOR FIRST DEGREE DIFFERENCING
ACF and PACF of diff_log
Per ACF, spikes at 1, 5, and 8. PACF tapers to zero. Thus, let’s try MA(1).
FirstDiff = diff(log(adj_nflx))
par(mfrow=c(2,1))
acf(FirstDiff, na.action = na.pass, main ='ACF Plot of First Differencing', plot = TRUE)
pacf(FirstDiff, na.action = na.pass, main = NULL)
MODELS FOR SECOND DEGREE DIFFERENCING
ACF was cut off at lag 1. PACF tailed off. Let’s try MA(1)
par(mfrow=c(2,1))
acf(diff(FirstDiff), na.action = na.pass, main ='ACF Plot of Second Differencing')
pacf(diff(FirstDiff), na.action = na.pass, main = NULL)
Now, we assigned the models’ names by different parameters.
Model1 = MA(1) for First Differencing Model2 = MA(1) for Second Differencing Model3 = auto.arima for Differencing for Logged Values
# Model1 = MA(1) for First Differencing
Model1 = sarima((log(adj_nflx)), 0,1,1)
## initial value -3.733756
## iter 2 value -3.736647
## iter 3 value -3.736667
## iter 3 value -3.736667
## iter 3 value -3.736667
## final value -3.736667
## converged
## initial value -3.736665
## iter 1 value -3.736665
## final value -3.736665
## converged
Model1
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## xreg = constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc,
## REPORT = 1, reltol = tol))
##
## Coefficients:
## ma1 constant
## -0.0732 0.0014
## s.e. 0.0269 0.0006
##
## sigma^2 estimated as 0.000568: log likelihood = 2915.7, aic = -5825.4
##
## $degrees_of_freedom
## [1] 1256
##
## $ttable
## Estimate SE t.value p.value
## ma1 -0.0732 0.0269 -2.7208 0.0066
## constant 0.0014 0.0006 2.2801 0.0228
##
## $AIC
## [1] -4.630684
##
## $AICc
## [1] -4.630676
##
## $BIC
## [1] -4.618433
# Model2 = MA(1) for Second Differencing
Model2 = sarima((log(adj_nflx)), 0,2,1)
## initial value -3.348754
## iter 2 value -3.595890
## iter 3 value -3.657550
## iter 4 value -3.722692
## iter 5 value -3.725012
## iter 6 value -3.725610
## iter 7 value -3.725624
## iter 8 value -3.725685
## iter 9 value -3.725702
## iter 10 value -3.725703
## iter 10 value -3.725703
## final value -3.725703
## converged
## initial value -3.727702
## iter 2 value -3.730423
## iter 3 value -3.730512
## iter 4 value -3.730519
## iter 4 value -3.730519
## final value -3.730519
## converged
Model2
## $fit
##
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S),
## include.mean = !no.constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc,
## REPORT = 1, reltol = tol))
##
## Coefficients:
## ma1
## -1.000
## s.e. 0.003
##
## sigma^2 estimated as 0.0005718: log likelihood = 2905.66, aic = -5807.31
##
## $degrees_of_freedom
## [1] 1256
##
## $ttable
## Estimate SE t.value p.value
## ma1 -1 0.003 -338.3381 0
##
## $AIC
## [1] -4.619979
##
## $AICc
## [1] -4.619977
##
## $BIC
## [1] -4.611807
# Model3 = Auto.arima for logged, first differencing
Model3 <- auto.arima(log(adj_nflx))
summary(Model3)
## Series: log(adj_nflx)
## ARIMA(0,1,2) with drift
##
## Coefficients:
## ma1 ma2 drift
## -0.0787 0.0430 0.0014
## s.e. 0.0283 0.0276 0.0006
##
## sigma^2 estimated as 0.0005683: log likelihood=2916.91
## AIC=-5825.82 AICc=-5825.79 BIC=-5805.27
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set 4.975572e-06 0.02380137 0.01665857 -6.499572e-05 0.2881782
## MASE ACF1
## Training set 0.9922833 0.00128343
Diagnostic Test for Model3
# Diagnostic Checking
checkresiduals(Model3)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,2) with drift
## Q* = 21.964, df = 7, p-value = 0.002577
##
## Model df: 3. Total lags used: 10
preds <- forecast(Model3, h = 30)
autoplot(preds)
# Backtest ARIMA model
results <- backtest(Model3, log(adj_nflx), orig = 80, h = 30)
## [1] "RMSE of out-of-sample forecasts"
## [1] 0.02450318 0.03339095 0.04090226 0.04760317 0.05399938 0.05898188
## [7] 0.06332254 0.06752260 0.07072667 0.07382688 0.07646482 0.07891052
## [13] 0.08161427 0.08436618 0.08725607 0.08984353 0.09277125 0.09545512
## [19] 0.09855400 0.10134673 0.10382177 0.10642313 0.10855922 0.11083121
## [25] 0.11284425 0.11497889 0.11711686 0.11913397 0.12102913 0.12294952
## [1] "Mean absolute error of out-of-sample forecasts"
## [1] 0.01726316 0.02443677 0.03013491 0.03524700 0.03985101 0.04409672
## [7] 0.04734963 0.05052035 0.05305025 0.05589673 0.05807326 0.06013008
## [13] 0.06244413 0.06496971 0.06735709 0.06992802 0.07247516 0.07473823
## [19] 0.07695234 0.07922764 0.08113685 0.08311863 0.08479285 0.08695604
## [25] 0.08855427 0.09029957 0.09217862 0.09391302 0.09527088 0.09680530
## [1] "Bias of out-of-sample forecasts"
## [1] 0.001277000 0.002507773 0.003792010 0.005071183 0.006359524 0.007638182
## [7] 0.008933103 0.010259864 0.011616813 0.012962110 0.014303527 0.015650820
## [13] 0.016971505 0.018271686 0.019583302 0.020906558 0.022232131 0.023552783
## [19] 0.024839846 0.026151707 0.027458753 0.028753056 0.030059930 0.031368503
## [25] 0.032646930 0.033954124 0.035292652 0.036624957 0.037960275 0.039282467
print(paste("Average RMSE: ", round(exp(mean(results$rmse)), digits = 3)))
## [1] "Average RMSE: 1.089"
print(paste("Average MAE: ", round(exp(mean(results$mabso)), digits = 3)))
## [1] "Average MAE: 1.068"
Predicted Prices for Next 30 Days
preds_df = data.frame(preds)
predicted_prices <- exp(preds_df)
predicted_prices
## Point.Forecast Lo.80 Hi.80 Lo.95 Hi.95
## 1260 681.6063 661.0972 702.7515 650.4914 714.2094
## 1261 682.4630 654.6942 711.4095 640.4546 727.2267
## 1262 683.4341 649.5005 719.1405 632.2247 738.7914
## 1263 684.4065 645.3018 725.8811 625.5137 748.8443
## 1264 685.3804 641.7353 731.9938 619.7676 757.9394
## 1265 686.3557 638.6161 737.6640 614.7035 766.3598
## 1266 687.3323 635.8344 743.0012 610.1538 774.2731
## 1267 688.3103 633.3189 748.0768 606.0096 781.7882
## 1268 689.2898 631.0201 752.9401 602.1953 788.9805
## 1269 690.2706 628.9022 757.6272 598.6562 795.9050
## 1270 691.2528 626.9382 762.1651 595.3510 802.6028
## 1271 692.2364 625.1070 766.5747 592.2477 809.1061
## 1272 693.2214 623.3921 770.8727 589.3208 815.4403
## 1273 694.2078 621.7798 775.0726 586.5497 821.6260
## 1274 695.1956 620.2592 779.1855 583.9176 827.6801
## 1275 696.1848 618.8209 783.2207 581.4103 833.6168
## 1276 697.1755 617.4572 787.1860 579.0159 839.4479
## 1277 698.1675 616.1613 791.0881 576.7242 845.1837
## 1278 699.1610 614.9276 794.9326 574.5265 850.8329
## 1279 700.1558 613.7511 798.7247 572.4152 856.4032
## 1280 701.1521 612.6273 802.4687 570.3836 861.9012
## 1281 702.1498 611.5525 806.1685 568.4258 867.3328
## 1282 703.1489 610.5232 809.8274 566.5366 872.7033
## 1283 704.1494 609.5364 813.4485 564.7115 878.0172
## 1284 705.1514 608.5893 817.0346 562.9463 883.2787
## 1285 706.1548 607.6796 820.5880 561.2372 888.4917
## 1286 707.1596 606.8050 824.1111 559.5809 893.6593
## 1287 708.1659 605.9634 827.6058 557.9744 898.7848
## 1288 709.1735 605.1532 831.0740 556.4148 903.8708
## 1289 710.1826 604.3726 834.5173 554.8996 908.9201
GARCH Models - Look at Volatility of Stock Prices
GARCH Model1 with ARMA order (1,1)
nflx_garch <- ugarchspec(variance.model = list(model="sGARCH",garchOrder=c(1,1)), mean.model = list(armaOrder=c(1,1)), distribution.model = "std")
nflx_garch1 <-ugarchfit(spec=nflx_garch, data=adj_nflx)
## Warning in arima(data, order = c(modelinc[2], 0, modelinc[3]), include.mean =
## modelinc[1], : possible convergence problem: optim gave code = 1
nflx_garch1
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(1,0,1)
## Distribution : std
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 113.690205 4.384903 25.9276 0.000000
## ar1 1.000000 0.000796 1256.1653 0.000000
## ma1 -0.059036 0.026992 -2.1871 0.028734
## omega 0.152889 0.102623 1.4898 0.136274
## alpha1 0.081915 0.012078 6.7820 0.000000
## beta1 0.917085 0.013259 69.1661 0.000000
## shape 4.211412 0.401749 10.4827 0.000000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 113.690205 0.385913 294.6003 0.000000
## ar1 1.000000 0.000874 1143.9489 0.000000
## ma1 -0.059036 0.028456 -2.0747 0.038019
## omega 0.152889 0.129386 1.1816 0.237345
## alpha1 0.081915 0.016830 4.8674 0.000001
## beta1 0.917085 0.020337 45.0946 0.000000
## shape 4.211412 0.421497 9.9916 0.000000
##
## LogLikelihood : -4224.874
##
## Information Criteria
## ------------------------------------
##
## Akaike 6.7226
## Bayes 6.7512
## Shibata 6.7225
## Hannan-Quinn 6.7333
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.6016 0.4380
## Lag[2*(p+q)+(p+q)-1][5] 3.3735 0.2619
## Lag[4*(p+q)+(p+q)-1][9] 6.9326 0.1346
## d.o.f=2
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.170 0.6801
## Lag[2*(p+q)+(p+q)-1][5] 1.073 0.8429
## Lag[4*(p+q)+(p+q)-1][9] 1.499 0.9556
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 0.6494 0.500 2.000 0.4203
## ARCH Lag[5] 0.6968 1.440 1.667 0.8244
## ARCH Lag[7] 0.8238 2.315 1.543 0.9404
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 7.6532
## Individual Statistics:
## mu 0.0005036
## ar1 0.7415155
## ma1 0.2570143
## omega 0.5953135
## alpha1 1.2333304
## beta1 0.8482511
## shape 1.0509154
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 1.69 1.9 2.35
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 0.4964 0.6197
## Negative Sign Bias 0.4132 0.6795
## Positive Sign Bias 1.1354 0.2564
## Joint Effect 1.5176 0.6782
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 26.29 0.1223
## 2 30 27.14 0.5642
## 3 40 35.07 0.6495
## 4 50 51.37 0.3812
##
##
## Elapsed time : 0.3072479
GARCH MODEL2 WITH ARMA (2,2)
nflx_garch2 <- ugarchspec(variance.model = list(model="sGARCH",garchOrder=c(1,1)), mean.model = list(armaOrder=c(2,2)), distribution.model = "std")
nflx_garch2 <-ugarchfit(spec=nflx_garch2, data=adj_nflx)
nflx_garch2
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(2,0,2)
## Distribution : std
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 113.204458 0.410537 275.7470 0.000000
## ar1 0.022298 0.004577 4.8720 0.000001
## ar2 0.981119 0.004625 212.1114 0.000000
## ma1 0.915764 0.000015 60943.5104 0.000000
## ma2 -0.070207 0.000592 -118.5324 0.000000
## omega 0.155211 0.103317 1.5023 0.133023
## alpha1 0.082929 0.012137 6.8326 0.000000
## beta1 0.916071 0.013341 68.6661 0.000000
## shape 4.166105 0.391210 10.6493 0.000000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 113.204458 0.294881 383.8983 0.000000
## ar1 0.022298 0.005400 4.1294 0.000036
## ar2 0.981119 0.005623 174.4833 0.000000
## ma1 0.915764 0.000013 68575.9719 0.000000
## ma2 -0.070207 0.000415 -169.3417 0.000000
## omega 0.155211 0.129171 1.2016 0.229520
## alpha1 0.082929 0.016732 4.9561 0.000001
## beta1 0.916071 0.020324 45.0734 0.000000
## shape 4.166105 0.413512 10.0749 0.000000
##
## LogLikelihood : -4221.8
##
## Information Criteria
## ------------------------------------
##
## Akaike 6.7209
## Bayes 6.7576
## Shibata 6.7208
## Hannan-Quinn 6.7347
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.9913 3.194e-01
## Lag[2*(p+q)+(p+q)-1][11] 8.9600 8.242e-06
## Lag[4*(p+q)+(p+q)-1][19] 13.9519 5.984e-02
## d.o.f=4
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.1380 0.7103
## Lag[2*(p+q)+(p+q)-1][5] 0.9734 0.8657
## Lag[4*(p+q)+(p+q)-1][9] 1.3741 0.9652
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 0.5714 0.500 2.000 0.4497
## ARCH Lag[5] 0.6049 1.440 1.667 0.8523
## ARCH Lag[7] 0.7411 2.315 1.543 0.9517
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 7.3642
## Individual Statistics:
## mu 0.01413
## ar1 0.05526
## ar2 0.03545
## ma1 0.12500
## ma2 0.08744
## omega 0.60695
## alpha1 1.29812
## beta1 0.87017
## shape 1.07678
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 2.1 2.32 2.82
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 0.7645 0.4447
## Negative Sign Bias 0.2577 0.7967
## Positive Sign Bias 1.2745 0.2027
## Joint Effect 1.8533 0.6034
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 18.16 0.5120
## 2 30 22.61 0.7940
## 3 40 31.33 0.8042
## 4 50 37.47 0.8855
##
##
## Elapsed time : 0.3916569
GARCH MODEL3 WITH ARMA ORDER (3,3)
nflx_garch3 <- ugarchspec(variance.model = list(model="sGARCH",garchOrder=c(1,1)), mean.model = list(armaOrder=c(3,3)), distribution.model = "std")
nflx_garch3 <-ugarchfit(spec=nflx_garch3, data=adj_nflx)
## Warning in arima(data, order = c(modelinc[2], 0, modelinc[3]), include.mean =
## modelinc[1], : possible convergence problem: optim gave code = 1
nflx_garch3
##
## *---------------------------------*
## * GARCH Model Fit *
## *---------------------------------*
##
## Conditional Variance Dynamics
## -----------------------------------
## GARCH Model : sGARCH(1,1)
## Mean Model : ARFIMA(3,0,3)
## Distribution : std
##
## Optimal Parameters
## ------------------------------------
## Estimate Std. Error t value Pr(>|t|)
## mu 112.980156 1.406618 80.3205 0.00000
## ar1 1.506436 0.001629 924.9161 0.00000
## ar2 -1.357242 0.001712 -792.6000 0.00000
## ar3 0.853159 0.001952 436.9622 0.00000
## ma1 -0.572363 0.027208 -21.0362 0.00000
## ma2 0.897680 0.019185 46.7910 0.00000
## ma3 -0.037132 0.027646 -1.3431 0.17923
## omega 0.155388 0.103996 1.4942 0.13513
## alpha1 0.082432 0.012089 6.8186 0.00000
## beta1 0.916568 0.013289 68.9721 0.00000
## shape 4.197099 0.399745 10.4994 0.00000
##
## Robust Standard Errors:
## Estimate Std. Error t value Pr(>|t|)
## mu 112.980156 0.531122 212.7198 0.000000
## ar1 1.506436 0.000627 2401.4776 0.000000
## ar2 -1.357242 0.000584 -2325.6007 0.000000
## ar3 0.853159 0.000228 3746.9702 0.000000
## ma1 -0.572363 0.028687 -19.9518 0.000000
## ma2 0.897680 0.019199 46.7564 0.000000
## ma3 -0.037132 0.030335 -1.2241 0.220926
## omega 0.155388 0.131475 1.1819 0.237252
## alpha1 0.082432 0.016700 4.9361 0.000001
## beta1 0.916568 0.020194 45.3886 0.000000
## shape 4.197099 0.425247 9.8698 0.000000
##
## LogLikelihood : -4221.973
##
## Information Criteria
## ------------------------------------
##
## Akaike 6.7243
## Bayes 6.7692
## Shibata 6.7242
## Hannan-Quinn 6.7412
##
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 1.031 0.3099
## Lag[2*(p+q)+(p+q)-1][17] 8.438 0.8273
## Lag[4*(p+q)+(p+q)-1][29] 16.372 0.2998
## d.o.f=6
## H0 : No serial correlation
##
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
## statistic p-value
## Lag[1] 0.1312 0.7172
## Lag[2*(p+q)+(p+q)-1][5] 0.9880 0.8624
## Lag[4*(p+q)+(p+q)-1][9] 1.3847 0.9645
## d.o.f=2
##
## Weighted ARCH LM Tests
## ------------------------------------
## Statistic Shape Scale P-Value
## ARCH Lag[3] 0.5769 0.500 2.000 0.4475
## ARCH Lag[5] 0.6430 1.440 1.667 0.8408
## ARCH Lag[7] 0.7626 2.315 1.543 0.9488
##
## Nyblom stability test
## ------------------------------------
## Joint Statistic: 8.041
## Individual Statistics:
## mu 0.01276
## ar1 0.04650
## ar2 0.03577
## ar3 0.03583
## ma1 0.52343
## ma2 0.53937
## ma3 0.12094
## omega 0.61567
## alpha1 1.23718
## beta1 0.85103
## shape 1.13167
##
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic: 2.49 2.75 3.27
## Individual Statistic: 0.35 0.47 0.75
##
## Sign Bias Test
## ------------------------------------
## t-value prob sig
## Sign Bias 0.8284 0.4076
## Negative Sign Bias 0.3194 0.7495
## Positive Sign Bias 1.3735 0.1699
## Joint Effect 2.2187 0.5283
##
##
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
## group statistic p-value(g-1)
## 1 20 15.58 0.6849
## 2 30 18.13 0.9415
## 3 40 29.61 0.8612
## 4 50 43.98 0.6764
##
##
## Elapsed time : 0.650609
Table to Show Akaike Values for the GARCH Models
GARCH(2,2) and GARCH(3,3) had lower p values for weighted Ljung-Box Test on standardized residuals.
GARCH(1,1) had the best metrics and outperformed the other two GARCH models. This is the model that will be used.
GARCH_Models = c('GARCH(1,1)', 'GARCH(2,2)','GARCH(3,3)')
Akaike = c('6.722', '6.721', '6.724')
gtable = data.frame(GARCH_Models, Akaike)
gtable
## GARCH_Models Akaike
## 1 GARCH(1,1) 6.722
## 2 GARCH(2,2) 6.721
## 3 GARCH(3,3) 6.724
kable(gtable, format = 'html', caption = '<b>Garch Model - Akaike Values<b>',position = 'center', align = 'cc') %>%
kable_styling(full_width = TRUE, position = 'center')%>%
kable_classic(html_font = 'Times New Roman')
| GARCH_Models | Akaike |
|---|---|
| GARCH(1,1) | 6.722 |
| GARCH(2,2) | 6.721 |
| GARCH(3,3) | 6.724 |
Plot of the growth rate with the conditional standard deviation superimposed.
#Remake the GARCH model
nflx_garch_mod <- garchFit(~arma(1,1) + garch(1, 1), data = diff(log(adj_nflx))[-1], trace = FALSE, cond.dist = "std")
## Warning: Using formula(x) is deprecated when x is a character vector of length > 1.
## Consider formula(paste(x, collapse = " ")) instead.
plot(nflx_garch_mod, which = 3)
FORECASTING
Forecasting with bootstrap forecast to forecast both series and conditional variances.
nflx_garch1 with arma order (1,1)
nflx_predict <- ugarchboot(nflx_garch1, n.ahead=30,method=c("Partial","Full")[1])
nflx_predict
##
## *-----------------------------------*
## * GARCH Bootstrap Forecast *
## *-----------------------------------*
## Model : sGARCH
## n.ahead : 30
## Bootstrap method: partial
## Date (T[0]): 2021-11-15
##
## Series (summary):
## min q.25 mean q.75 max forecast[analytic]
## t+1 632.66 672.42 679.59 687.22 738.74 679.43
## t+2 538.55 669.35 679.89 690.52 791.50 679.43
## t+3 550.07 669.33 681.55 692.57 815.89 679.43
## t+4 557.29 667.68 682.10 694.91 833.07 679.43
## t+5 566.31 666.84 683.27 699.59 863.72 679.43
## t+6 548.26 665.41 684.00 700.69 886.41 679.43
## t+7 530.64 663.88 684.35 703.90 851.05 679.43
## t+8 520.55 662.59 685.23 707.08 863.74 679.43
## t+9 491.48 661.59 686.45 708.45 872.48 679.43
## t+10 492.82 661.98 687.63 709.53 1130.37 679.43
## .....................
##
## Sigma (summary):
## min q0.25 mean q0.75 max forecast[analytic]
## t+1 12.9669 12.967 12.967 12.967 12.967 12.967
## t+2 12.4238 12.452 12.910 12.956 21.034 12.966
## t+3 11.9071 12.133 12.934 13.051 34.935 12.966
## t+4 11.4188 11.821 12.866 13.079 33.488 12.965
## t+5 10.9436 11.556 12.879 13.270 32.144 12.965
## t+6 10.4945 11.300 12.904 13.338 35.923 12.964
## t+7 10.0757 11.107 12.976 13.481 40.109 12.963
## t+8 9.6731 10.907 12.975 13.468 38.791 12.963
## t+9 9.3999 10.802 12.941 13.602 38.318 12.962
## t+10 9.0854 10.641 13.031 13.821 38.587 12.962
## .....................
plot(nflx_predict,which=2)
Predicted Prices by GARCH Models
nflx_predict_df <- as.data.frame(nflx_predict, which="series", type="summary")
nflx_predict_df
## t+1 t+2 t+3 t+4 t+5 t+6 t+7 t+8
## min 632.6593 538.5469 550.0660 557.2893 566.3135 548.2573 530.6397 520.5545
## q.25 672.4154 669.3474 669.3329 667.6823 666.8359 665.4065 663.8832 662.5868
## mean 679.5908 679.8897 681.5500 682.0956 683.2690 684.0002 684.3509 685.2259
## q.75 687.2170 690.5204 692.5723 694.9061 699.5861 700.6930 703.9022 707.0761
## max 738.7356 791.5019 815.8925 833.0740 863.7238 886.4149 851.0502 863.7363
## t+9 t+10 t+11 t+12 t+13 t+14 t+15
## min 491.4779 492.8209 477.1918 476.1586 473.8575 478.1804 479.1904
## q.25 661.5907 661.9792 661.3764 661.7264 663.9297 661.3463 660.9731
## mean 686.4541 687.6263 687.6918 688.8259 690.0007 691.0194 691.9988
## q.75 708.4546 709.5256 711.6321 712.9928 714.6862 714.4267 716.3889
## max 872.4787 1130.3716 1043.3513 1149.3081 1030.4334 951.4708 899.6630
## t+16 t+17 t+18 t+19 t+20 t+21 t+22 t+23
## min 435.4373 434.9079 409.8845 430.0036 413.2136 401.4535 397.6713 389.2332
## q.25 661.6972 665.0289 663.2572 661.6271 665.8382 664.1485 663.6027 664.5859
## mean 693.1420 695.0255 696.0177 697.1949 697.7840 699.0310 700.4403 702.0279
## q.75 719.4653 724.9143 726.8301 729.0241 730.6865 732.0187 733.4152 734.0622
## max 892.3146 933.7354 932.3626 934.3327 918.2503 948.2123 967.5155 1004.0163
## t+24 t+25 t+26 t+27 t+28 t+29 t+30
## min 406.0174 388.9668 404.4906 366.8711 377.8398 411.9146 411.7406
## q.25 666.2850 663.7599 664.8050 666.1088 667.3232 665.0756 663.4948
## mean 703.9671 704.6951 705.8173 706.8436 707.9771 708.2310 709.2832
## q.75 739.0882 740.4818 741.6411 743.8928 744.2593 746.9312 748.5852
## max 1073.1545 1111.7471 1080.9183 1160.2615 1134.6811 1304.7720 1382.4621
pred_table <- as.data.frame(t(nflx_predict_df))
pred_table
## min q.25 mean q.75 max
## t+1 632.6593 672.4154 679.5908 687.2170 738.7356
## t+2 538.5469 669.3474 679.8897 690.5204 791.5019
## t+3 550.0660 669.3329 681.5500 692.5723 815.8925
## t+4 557.2893 667.6823 682.0956 694.9061 833.0740
## t+5 566.3135 666.8359 683.2690 699.5861 863.7238
## t+6 548.2573 665.4065 684.0002 700.6930 886.4149
## t+7 530.6397 663.8832 684.3509 703.9022 851.0502
## t+8 520.5545 662.5868 685.2259 707.0761 863.7363
## t+9 491.4779 661.5907 686.4541 708.4546 872.4787
## t+10 492.8209 661.9792 687.6263 709.5256 1130.3716
## t+11 477.1918 661.3764 687.6918 711.6321 1043.3513
## t+12 476.1586 661.7264 688.8259 712.9928 1149.3081
## t+13 473.8575 663.9297 690.0007 714.6862 1030.4334
## t+14 478.1804 661.3463 691.0194 714.4267 951.4708
## t+15 479.1904 660.9731 691.9988 716.3889 899.6630
## t+16 435.4373 661.6972 693.1420 719.4653 892.3146
## t+17 434.9079 665.0289 695.0255 724.9143 933.7354
## t+18 409.8845 663.2572 696.0177 726.8301 932.3626
## t+19 430.0036 661.6271 697.1949 729.0241 934.3327
## t+20 413.2136 665.8382 697.7840 730.6865 918.2503
## t+21 401.4535 664.1485 699.0310 732.0187 948.2123
## t+22 397.6713 663.6027 700.4403 733.4152 967.5155
## t+23 389.2332 664.5859 702.0279 734.0622 1004.0163
## t+24 406.0174 666.2850 703.9671 739.0882 1073.1545
## t+25 388.9668 663.7599 704.6951 740.4818 1111.7471
## t+26 404.4906 664.8050 705.8173 741.6411 1080.9183
## t+27 366.8711 666.1088 706.8436 743.8928 1160.2615
## t+28 377.8398 667.3232 707.9771 744.2593 1134.6811
## t+29 411.9146 665.0756 708.2310 746.9312 1304.7720
## t+30 411.7406 663.4948 709.2832 748.5852 1382.4621